# coding:utf-8

# 用selenium爬取https://news.qq.com/ 的热点精选
import requests
import time
from lxml import etree
from bs4 import BeautifulSoup
from selenium import webdriver


def get_news():
    url = 'https://news.qq.com/'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'}

    res = requests.get(url, headers=headers)
    soup = BeautifulSoup(res.text, 'html.parser')
    # print(soup)
    div = soup.select('#List > ul.list')
    print(div)
    # html = etree.HTML(res.text)
    # hot_news = html.xpath('//div[@id="List"]/div/ul[@class="list"]')
    # hot_news = html.xpath('//div[@id="List"]')
    # hot_news = html.xpath('//ul[@class="list"]')
    # print('hot=', hot_news)


def get_page_source():
    # driver = webdriver.Chrome(".chromedriver.exe")
    driver = webdriver.Chrome()
    driver.get("https://news.qq.com/")
    time.sleep(2)
    page_source = driver.page_source
    soup = BeautifulSoup(page_source)
    # print(soup.prettify())
    ele_list = soup.select('.detail > h3 > a')
    # print(ele_list)
    for ele in ele_list:
        print(type(str(ele)), str(ele))
        # print(etree.HTML(str(ele)).xpath('text()'))

    # html = etree.HTML(page_source)
    # print(page_source)
    # #
    # # li_list = html.xpath('//*[@id="List"]/div/div[@class="jx-tit"]')
    # # li_list = html.xpath('//div[@class="lazyload-placeholder"]')
    # #
    # li_list = html.xpath('//div[@class="txt"]')
    # print(len(li_list))
    # print(li_list)
    # for li in li_list:
    #     print(li.xpath('text()'))
    #
    # print(ul)
    # print(type(driver.page_source))


# get_news()
get_page_source()
